%  =======================================================================================================================================================
% 
%  Code Description: 
%  This codefile builds the IFS-Base dataset from the raw data provided by the Bundesbank's Research Data and Service Centre (RDSC)
% 
%  =======================================================================================================================================================
% 
% 
%  General disclaimer:
%  This file directory produces replication code for "Connected Funds". 
%  Because we cannot share the underlying data provided by the Bundesbank's Research Data and Service Centre (RDSC) and other subscription data sources, 
%  we have included pseudo data to show how the raw data are formatted. 
%  Other researchers can go through a similar approval and subscription process to obtain the underlying data. (2023-04-06)
% 
%  =======================================================================================================================================================


clear all;
close all;

projectPath = 'C:\ConnectedFunds_Codebase\'

% Add functions folders
addpath(strcat(projectPath, 'Code\matlab_functions'));


%  ========================================== Re-classify miss-classified funds based on empirical securities holdings ================================================


load(strcat(projectPath, 'Data\IFS\IFS_All_CleanData'));

Results = []

Start_Timer = datetime;

for t = 1 : length(umonth)

    umonth(t)

    load(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_', num2str(umonth(t))), 'IFS_Holdings');
    load(strcat(projectPath, 'Data\IFS\mat\IFS_Holdings_NoISIN_', num2str(umonth(t))), 'IFS_Holdings_NoISIN');

    IFS_Holdings_All = [IFS_Holdings; IFS_Holdings_NoISIN]; 

    load(strcat(projectPath, 'Data\CSDB\mat\CSDB_', num2str(umonth(t))));    
    CSDB.Properties.VariableNames("ISIN") = "SECCODE";

    if umonth(t)<201412
        IFS_Holdings_All = outerjoin(IFS_Holdings_All, CSDB(:, {'SECCODE', 'ESA_INS_1995'}), Keys="SECCODE", MergeKeys=true, Type='left');
        IFS_Holdings_All.Properties.VariableNames("ESA_INS_1995") = "ESA_INS_CODE";
        IFS_Holdings_All.ESA_INS_CODE = strrep(IFS_Holdings_All.ESA_INS_CODE, '.', '_');
    else
        IFS_Holdings_All = outerjoin(IFS_Holdings_All, CSDB(:, {'SECCODE', 'ESA_INS_2010'}), Keys="SECCODE", MergeKeys=true, Type='left');
        IFS_Holdings_All.Properties.VariableNames("ESA_INS_2010") = "ESA_INS_CODE";
    end
    
    IFS_Holdings_All.ISIN = cellstr(IFS_Holdings_All.ISIN);
    IFS_Holdings_All = outerjoin(IFS_Holdings_All, IFS_All(IFS_All.DATUM == umonth(t), {'ISIN', 'bilanzsumme'}), Keys="ISIN", MergeKeys=true, Type='left');
    
    IFS_Holdings_All.isWP     = startsWith(IFS_Holdings_All.ESA_INS_CODE, 'F');
    IFS_Holdings_All.isEquity = startsWith(IFS_Holdings_All.ESA_INS_CODE, 'F_51');
    IFS_Holdings_All.isFund   = startsWith(IFS_Holdings_All.ESA_INS_CODE, 'F_52');
    IFS_Holdings_All.isBond   = startsWith(IFS_Holdings_All.ESA_INS_CODE, 'F_3');

    [tmpEquities, ~, idxWhereFundsEquityholdings] = unique(IFS_Holdings_All(:, {'ISIN', 'isEquity'}));
    tmpEquities.EquityHoldings = accumarray(idxWhereFundsEquityholdings, IFS_Holdings_All.AMOUNT);
    [tmpBonds, ~, idxWhereFundsBondholdings] = unique(IFS_Holdings_All(:, {'ISIN', 'isBond'}));
    tmpBonds.BondHoldings = accumarray(idxWhereFundsBondholdings, IFS_Holdings_All.AMOUNT);
    [tmpFunds, ~, idxWhereFundsFundholdings] = unique(IFS_Holdings_All(:, {'ISIN', 'isFund'}));
    tmpFunds.FundHoldings = accumarray(idxWhereFundsFundholdings, IFS_Holdings_All.AMOUNT);
    [tmpWPs, ~, idxWhereFundsWPHoldings] = unique(IFS_Holdings_All(:, {'ISIN', 'isWP'}));
    tmpWPs.WPHoldings = accumarray(idxWhereFundsWPHoldings, IFS_Holdings_All.AMOUNT);

    tmpResults = unique(IFS_Holdings_All(:, {'DATUM', 'ISIN', 'bilanzsumme'}));
    tmpResults = outerjoin(tmpResults, tmpEquities(tmpEquities.isEquity == 1, {'ISIN', 'EquityHoldings'}), Keys="ISIN", MergeKeys=true, Type='left');
    tmpResults = outerjoin(tmpResults, tmpBonds(tmpBonds.isBond == 1, {'ISIN', 'BondHoldings'}), Keys="ISIN", MergeKeys=true, Type='left');
    tmpResults = outerjoin(tmpResults, tmpFunds(tmpFunds.isFund == 1, {'ISIN', 'FundHoldings'}), Keys="ISIN", MergeKeys=true, Type='left');
    tmpResults = outerjoin(tmpResults, tmpWPs(tmpWPs.isWP == 1, {'ISIN', 'WPHoldings'}), Keys="ISIN", MergeKeys=true, Type='left');
    
    tmpResults.EquityHoldings(isnan(tmpResults.EquityHoldings)) = 0;
    tmpResults.BondHoldings(isnan(tmpResults.BondHoldings))     = 0;
    tmpResults.FundHoldings(isnan(tmpResults.FundHoldings))     = 0;
    tmpResults.WPHoldings(isnan(tmpResults.WPHoldings))         = 0;

    tmpResults.equityshare = tmpResults.EquityHoldings ./ tmpResults.bilanzsumme;
    tmpResults.bondshare   = tmpResults.BondHoldings ./ tmpResults.bilanzsumme;
    tmpResults.fundshare   = tmpResults.FundHoldings ./ tmpResults.bilanzsumme;
    tmpResults.wpshare     = tmpResults.WPHoldings ./ tmpResults.bilanzsumme;

    Results = [Results; tmpResults];

end

Stop_Timer = datetime;

% Compute runtime
Duration = Stop_Timer - Start_Timer;
disp(['Get portfolio data: Total execution time is ', char(Duration), '.']);
clearvars Start_Timer Stop_Timer Duration

IFS_All = outerjoin(IFS_All, unique(Results(:, {'DATUM', 'ISIN', 'EquityHoldings', 'BondHoldings', 'FundHoldings', 'WPHoldings'})), Keys={'DATUM', 'ISIN'}, MergeKeys=true, Type='left');

[Reclassification_Table, ~, idxWhereFundISIN]   = unique(Results.ISIN);
Reclassification_Table                          = table(Reclassification_Table);
Reclassification_Table.Properties.VariableNames('Reclassification_Table') = {'ISIN'};
Reclassification_Table.mean_equityshare         = accumarray(idxWhereFundISIN, Results.equityshare, [], @nanmean);
Reclassification_Table.mean_bondshare           = accumarray(idxWhereFundISIN, Results.bondshare, [], @nanmean);
Reclassification_Table.mean_fundshare           = accumarray(idxWhereFundISIN, Results.fundshare, [], @nanmean);
Reclassification_Table.mean_wpshare             = accumarray(idxWhereFundISIN, Results.wpshare, [], @nanmean);

Reclassification_Table                     = Reclassification_Table(~isnan(Reclassification_Table.mean_equityshare) & ~isnan(Reclassification_Table.mean_bondshare) & ~isnan(Reclassification_Table.mean_fundshare), :);
Reclassification_Table.isEquityFund        = zeros(size(Reclassification_Table, 1), 1);
Reclassification_Table.isBondFund          = zeros(size(Reclassification_Table, 1), 1);
Reclassification_Table.isMixedSecurityFund = zeros(size(Reclassification_Table, 1), 1);
Reclassification_Table.isFundOfFund        = zeros(size(Reclassification_Table, 1), 1);

Reclassification_Table.isEquityFund(Reclassification_Table.mean_equityshare > 0.5)  = 1;
Reclassification_Table.isBondFund(Reclassification_Table.mean_bondshare > 0.5)      = 1;
Reclassification_Table.isFundOfFund(Reclassification_Table.mean_fundshare > 0.5)    = 1;
Reclassification_Table.isMixedSecurityFund(~Reclassification_Table.isEquityFund & ~Reclassification_Table.isBondFund & ~Reclassification_Table.isFundOfFund & Reclassification_Table.mean_wpshare > 0.5) = 1;

Reclassification_Table.control = Reclassification_Table.isEquityFund + Reclassification_Table.isBondFund + Reclassification_Table.isMixedSecurityFund + Reclassification_Table.isFundOfFund;
nnz(Reclassification_Table.control > 1)

IFS_All    = outerjoin(IFS_All, Reclassification_Table, 'Keys', 'ISIN', 'MergeKeys', true, 'Type', 'left');

% Only re-classify if 
... fund is classified as "Other fund" (artmittel = 14) and
... wp holdings are smaller or equal to funds' total assets (bilanzsumme)

IFS_All.artmittel = IFS_All.FCAT;
IFS_All.artmittel(ismember(IFS_All.FCAT, 14) & IFS_All.isEquityFund == 1 & IFS_All.WPHoldings <= IFS_All.bilanzsumme)          = 1;
IFS_All.artmittel(ismember(IFS_All.FCAT, 14) & IFS_All.isBondFund == 1   & IFS_All.WPHoldings <= IFS_All.bilanzsumme)          = 2;
IFS_All.artmittel(ismember(IFS_All.FCAT, 14) & IFS_All.isMixedSecurityFund == 1 & IFS_All.WPHoldings <= IFS_All.bilanzsumme)   = 3;
IFS_All.artmittel(ismember(IFS_All.FCAT, 14) & IFS_All.isFundOfFund == 1 & IFS_All.WPHoldings <= IFS_All.bilanzsumme)          = 7;

clear t* IFS_Holdings* CSDB idx* date Reclassification* Results* ans;

save(strcat(projectPath, 'Data\IFS\IFS_All_CleanData'));